# ============================== SETUP ===============================
rm(list = ls())
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(dplyr)
set.seed(1234)

# ============================== TABLE A4 ===============================
files <- c(
  "../datasets/input/dictionaries/racekeywords.csv",
  "../datasets/input/dictionaries/nationalismkeywords.csv",
  "../datasets/input/dictionaries/genderkeywords.csv",
  "../datasets/input/dictionaries/partisankeywords.csv",
  "../datasets/input/dictionaries/religionkeywords.csv"
)
categories <- c("Race", "Nationalism", "Gender", "Partisan", "Religion")

escape_latex <- function(text) {
  text <- gsub("_", "\\\\_", text)
  text <- gsub("&", "\\\\&", text)
  text <- gsub("%", "\\\\%", text)
  text <- gsub("\\$", "\\\\$", text)
  text <- gsub("#", "\\\\#", text)
  text <- gsub("\\\\", "\\\\\\\\", text)
  return(text)
}

read_and_sample <- function(file) {
  df <- read.csv(file, stringsAsFactors = FALSE)
  sampled_words <- sample(df[[1]], min(20, nrow(df)))
  return(escape_latex(sampled_words))
}

samples <- lapply(files, read_and_sample)
formatted_keywords <- sapply(samples, function(x) paste(x, collapse = ", "))
latex_table <- data.frame(Category = categories, Keywords = formatted_keywords)

latex_code <- paste(
  "\\begin{table}[h]",
  "\\centering",
  "\\renewcommand{\\arraystretch}{1.2}",
  "\\begin{tabular}{|l|p{10cm}|}",
  "\\hline",
  "\\textbf{Category} & \\textbf{Sampled Keywords} \\\\",
  "\\hline",
  paste0("\\textbf{", latex_table$Category, "} & ", latex_table$Keywords, " \\\\", collapse = "\n"),
  "\\hline",
  "\\end{tabular}",
  "\\caption{Randomly sampled keywords from each dictionary.}",
  "\\label{tab:keywords}",
  "\\end{table}",
  sep = "\n"
)

cat(latex_code)